#
# You can use this CREATE statement for "url" table instead of 
# default one. This structure usefull for huge "cache mode" databases 
# with several millions URLs.
#
#   New features of this scheme:
#
# * Support for MySQL RAID to break 2/4G data file size limit.
# * Relatively small "url.MYI" index file size:
#     there is no unique index on "url" field.
# * Quick search for expired documents at indexing time using 
#     "key_next_index_time" index. It significantly improves 
#     indexing speed for big databases.
# * UNIQUE rec_id is generated in indexer using CRC32(url)
# * It turns on large file MySQL support for "url" table.
#
# Disadvantage:
#  * This scheme probably will loose some documents as far as CRC32
#  algorythm which is used for rec_id generation can give same values for
#  different URLs. According to our tests it gives approximately
#  100 URL pairs with the same CRC32 within 3.5 millions of unique URLs. 
#  It means that 0.0028% documents will be losten.
#
# Requires:
# * Specify "--with-raid" and omit "--disable-large-files" when
#      installing MySQL.
# * Use "UseCRC32UrlID yes" command in your indexer.conf
#


DROP TABLE url;

CREATE TABLE url (
  rec_id int(11) DEFAULT '0' NOT NULL,
  status int(11) DEFAULT '0' NOT NULL,
  url char(128) DEFAULT '' NOT NULL,
  content_type char(48) DEFAULT '' NOT NULL,
  docsize int(11) DEFAULT '0' NOT NULL,
  last_index_time INT NOT NULL,
  next_index_time INT NOT NULL,
  last_mod_time INT NOT NULL,
  referrer int(11) DEFAULT '0' NOT NULL,
  tag char(16) DEFAULT '0' NOT NULL,
  hops int(11) DEFAULT '0' NOT NULL,
  category char(16) DEFAULT '' NOT NULL,
  crc32 int(11) DEFAULT '0' NOT NULL,
  lang char(32) DEFAULT '' NOT NULL,
  charset char(40) DEFAULT '' NOT NULL,
  seed smallint(6) DEFAULT '0' NOT NULL,
  bad_since_time INT NOT NULL,
  PRIMARY KEY (rec_id),
  UNIQUE url (url),
  KEY key_crc (crc32),
  KEY key_referrer (referrer),
  KEY key_bad_since_time (bad_since_time),
  KEY key_next_index_time (next_index_time)
)  
  RAID_TYPE=RAID0 RAID_CHUNKS=16 RAID_CHUNKSIZE=256
  MAX_ROWS=100000000 
  AVG_ROW_LENGTH=512
;

